# Load necessary library
library(pacman)
p_load(
  AER, here, data.table, fst, collapse, purrr, 
  tictoc, furrr, ggplot2
)


# Lets run a simulation 
set.seed(123)
sim_tobit = function(
  i,
  N = 8760, 
  y_func = function(x) 2 + 3*x + 0.25*x^2,
  lb = 0, 
  ub = 2000, 
  sigma = 2, 
  normal = TRUE, 
  summarize = TRUE
){
  # Drawing random variables
  epsilon = rnorm(N)
  x = runif(N, min = 0, max = 100)
  if(normal == TRUE){
    y_raw = y_func(x) + sigma*epsilon
  }else if(normal == FALSE){
    y_raw = fcase(
      x < 50, -100, 
      x < 75, ub/2, 
      x >= 75, ub
    ) + sigma*epsilon
  }else{
    stop('normal messed up')
  }
  y = fcase(
    y_raw >= lb & y_raw <= ub, y_raw, 
    y_raw > ub, ub, 
    y_raw < lb, lb 
  )
  # Compiling into a table
  sim_dt = data.table(i = i, x = x, y = y, y_raw = y_raw, epsilon, normal = normal)
  # Fitting the model 
  mod = tobit(
    formula = y ~ x + I(x^2), 
    left = lb, 
    right = ub,
    data = sim_dt
  )
  # Generate fitted values 
  epsilon_fit = rnorm(N,mean = 0, sd = exp(log(mod$scale)))
  sim_dt[,':='(
    epsilon_fit = epsilon_fit, 
    y_hat_raw = fitted(mod) + epsilon_fit
  )]
  # Draw new epsilons 
  sim_dt[,y_hat := fcase(
    y_hat_raw >= lb & y_hat_raw <= ub, y_hat_raw, 
    y_hat_raw > ub, ub, 
    y_hat_raw < lb, lb 
  )]
  if(summarize == TRUE){
    # Summarize
    result_dt = fmean(gby(sim_dt, i, normal))[,':='(
      scale_hat = mod$scale
    )]
  }else{
    result_dt = sim_dt
  }
  return(result_dt)
}

# Running it 4000 times 
set.seed(218)
no_cores = availableCores()-1
plan(multisession, workers = no_cores)
tic()
result_dt = rbind(
  future_map_dfr(
    1:4000, 
    sim_tobit, 
    normal = TRUE
  ),
  future_map_dfr(
    1:4000, 
    sim_tobit, 
    normal = FALSE
  )
)
toc()

result_dt[,normal_n := fifelse(normal == TRUE, 'Normal errors', 'Non-normal errors')]
# Plotting the results 
tobit_pred_p = 
  ggplot(result_dt, aes(x = y, y = y_hat)) + 
  geom_abline(slope = 1, intercept = 0, linetype = 'dashed') + 
  geom_point(alpha= 0.2) + 
  #geom_smooth() +
  theme_minimal() +
  labs(
    y = 'Average Prediction', 
    x = 'Average Outcome', 
    caption = ""
  ) + 
  facet_wrap(~normal_n, scales = 'free')
ggsave(
  tobit_pred_p, 
  filename = here('figures/electricity-generation/old/under-prediction/tobit-sim.jpeg'), 
  width = 6, height = 3
)
# Distribution of differences 
result_dt[,.((sum(y)-sum(y_hat))/sum(y))]
ggplot(result_dt, aes(x = y - y_hat)) + 
  geom_histogram(bins = 100) +
  theme_minimal()

# Just grabbing an example from each type 
set.seed(123)
sim_dt = rbind(
  sim_tobit(i =1, normal = TRUE, summarize = FALSE), 
  sim_tobit(i = 1, normal = FALSE, summarize = FALSE)
)
# What does distribution of y look like 
ggplot(sim_dt, aes(x = y)) + 
  geom_histogram(bins  = 100) + 
  theme_minimal() + 
  facet_wrap(~normal)
ggplot(sim_dt, aes(x = y-y_hat)) + 
  geom_histogram(bins  = 100) + 
  theme_minimal() + 
  facet_wrap(~normal, scales ='free')

